set wd and install packages

setwd("/Users/vh3/Documents/MCA/ANALYSIS_3")

#load required packages
require("Matrix")
library(scater, quietly = TRUE)
require("SingleCellExperiment")
options(stringsAsFactors = FALSE)
library(plotly)
library(devtools)

read in data, normalize two ways and look at PCAs

molecules <- read.table("/Users/vh3/Documents/MCA/forgithub/Expression_Matrices/Smartseq2/SS2_counts.csv", header = TRUE, sep = ",", row.names=1, stringsAsFactors = TRUE)
anno <- read.delim("/Users/vh3/Documents/MCA/forgithub/Expression_Matrices/Smartseq2/SS2_pheno.csv", header = TRUE, sep = ",")


cols <- c("bbSpz" = "navy", "EEF"="darkorange", "Merozoite"="lightpink", "oocyst"="steelblue", "ook" = "turquoise4", "ooSpz" = "lightskyblue", "Ring"="hotpink", "sgSpz"= "royalblue", "Schizont" = "violetred", "Male"="purple", "Female"="purple4", "ookoo" = "mediumturquoise", "Trophozoite"="violet")


mca.qc <- SingleCellExperiment(assays = list(
  counts = as.matrix(molecules),
  logcounts = log2(as.matrix(molecules) + 1)
), colData = anno)


mca.qc.ookoo <- mca.qc[, (colData(mca.qc)$ShortenedLifeStage == "ook") | (colData(mca.qc)$ShortenedLifeStage == "ookoo") | (colData(mca.qc)$ShortenedLifeStage == "oocyst")]
mca.qc.eef <- mca.qc[, (colData(mca.qc)$ShortenedLifeStage == "EEF")]
mca.qc.spz <- mca.qc[, (colData(mca.qc)$ShortenedLifeStage == "bbSpz") | 
                          (colData(mca.qc)$ShortenedLifeStage == "sgSpz") |
                          (colData(mca.qc)$ShortenedLifeStage == "ooSpz")]
mca.qc.idc <- mca.qc[, (colData(mca.qc)$ShortenedLifeStage == "Merozoite") | 
                       (colData(mca.qc)$ShortenedLifeStage == "Ring") |
                       (colData(mca.qc)$ShortenedLifeStage2 == "Schizont") | 
                       (colData(mca.qc)$ShortenedLifeStage2 == "Trophozoite")]
mca.qc.sex <- mca.qc[, (colData(mca.qc)$ShortenedLifeStage2 == "Male") | (colData(mca.qc)$ShortenedLifeStage2 == "Female")]


mca.qc.ookoo.tmm <- scater::normaliseExprs(mca.qc.ookoo, method = "TMM")
#mca.qc.ooc.tmm <- scater::normaliseExprs(mca.qc.ooc, method = "TMM")
mca.qc.eef.tmm <- scater::normaliseExprs(mca.qc.eef, method = "TMM")
mca.qc.spz.tmm <- scater::normaliseExprs(mca.qc.spz, method = "TMM")
mca.qc.idc.tmm <- scater::normaliseExprs(mca.qc.idc, method = "TMM")
mca.qc.sex.tmm <- scater::normaliseExprs(mca.qc.sex, method = "TMM")

mca.qc.tmm <- cbind(mca.qc.ookoo.tmm, mca.qc.eef.tmm)
#mca.qc.tmm <- cbind(mca.qc.tmm, mca.qc.eef.tmm)
mca.qc.tmm <- cbind(mca.qc.tmm, mca.qc.spz.tmm)
mca.qc.tmm <- cbind(mca.qc.tmm, mca.qc.idc.tmm)
mca.qc.tmm <- cbind(mca.qc.tmm, mca.qc.sex.tmm)

pca <- plotPCA(mca.qc.tmm, colour_by = "ShortenedLifeStage2", ntop=50, exprs_values="logcounts")
pcatab <- pca$data
ggplot(pcatab, aes(PC1, PC2)) + geom_point(aes(colour=factor(colour_by))) + scale_color_manual(values = cols) + theme_classic()

###This is what the PCA is from in FIG 1
mca.qc.tmmall <- scater::normaliseExprs(mca.qc, method = "TMM")
pca <- plotPCA(mca.qc.tmmall, colour_by = "ShortenedLifeStage2", exprs_values = "logcounts", ntop = 50)
pcatab <- pca$data
ggplot(pcatab, aes(PC1, PC2)) + geom_point(aes(colour=factor(colour_by))) + scale_color_manual(values = cols) + theme_classic()

pca <- plotPCA(mca.qc.tmmall, exprs_values = "logcounts", ntop = 50, colour_by = "ShortenedLifeStage2", ncomp=3)
dat <- pca$data
ggplot(dat, aes(PC1, PC2)) + geom_point(aes(colour=factor(colour_by))) + scale_color_manual(values = cols) + theme_classic()

ggplot(dat, aes(PC2, PC3)) + geom_point(aes(colour=factor(colour_by))) + scale_color_manual(values = cols) + theme_classic()

ggplot(dat, aes(PC1, PC3)) + geom_point(aes(colour=factor(colour_by))) + scale_color_manual(values = cols) + theme_classic()

plot_ly(dat, 
        x           = ~PC1, 
        y           = ~PC2, 
        z           = ~PC3,
        type        = 'scatter3d', 
        mode        = 'markers',
         #hoverinfo   = "text",
        #text       = ~paste0(gene_id, " ", gene_name),
        color      = ~colour_by,
        colors     = c("bbSpz" = "navy", "EEF"="darkorange", "Merozoite"="lightpink", "oocyst"="steelblue", "ook" = "turquoise4", "ooSpz" = "lightskyblue", "Ring"="hotpink", "sgSpz"= "royalblue", "Schizont" = "violetred", "Male"="purple", "Female"="purple4", "ookoo" = "mediumturquoise", "Trophozoite"="violet"),
        marker      = list(size = 3)
)

session info

session_info()
## Session info -------------------------------------------------------------
##  setting  value                       
##  version  R version 3.4.4 (2018-03-15)
##  system   x86_64, darwin15.6.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_GB.UTF-8                 
##  tz       Europe/London               
##  date     2019-01-24
## Packages -----------------------------------------------------------------
##  package              * version   date      
##  AnnotationDbi          1.40.0    2017-10-31
##  assertthat             0.2.0     2017-04-11
##  backports              1.1.2     2017-12-13
##  base                 * 3.4.4     2018-03-15
##  beeswarm               0.2.3     2016-04-25
##  bindr                  0.1.1     2018-03-13
##  bindrcpp             * 0.2.2     2018-03-29
##  Biobase              * 2.38.0    2017-10-31
##  BiocGenerics         * 0.24.0    2017-10-31
##  biomaRt                2.34.2    2018-01-20
##  bit                    1.1-14    2018-05-29
##  bit64                  0.9-7     2017-05-08
##  bitops                 1.0-6     2013-08-17
##  blob                   1.1.1     2018-03-25
##  colorspace             1.3-2     2016-12-14
##  compiler               3.4.4     2018-03-15
##  cowplot                0.9.3     2018-07-15
##  crayon                 1.3.4     2017-09-16
##  crosstalk              1.0.0     2016-12-21
##  data.table             1.11.4    2018-05-27
##  datasets             * 3.4.4     2018-03-15
##  DBI                    1.0.0     2018-05-02
##  DelayedArray         * 0.4.1     2017-11-07
##  devtools             * 1.13.6    2018-06-27
##  digest                 0.6.15    2018-01-28
##  dplyr                  0.7.6     2018-06-29
##  edgeR                  3.20.9    2018-02-27
##  evaluate               0.10.1    2017-06-24
##  GenomeInfoDb         * 1.14.0    2017-10-31
##  GenomeInfoDbData       1.0.0     2018-02-12
##  GenomicRanges        * 1.30.3    2018-02-26
##  ggbeeswarm             0.6.0     2017-08-07
##  ggplot2              * 2.2.1     2016-12-30
##  glue                   1.2.0     2017-10-29
##  graphics             * 3.4.4     2018-03-15
##  grDevices            * 3.4.4     2018-03-15
##  grid                   3.4.4     2018-03-15
##  gridExtra              2.3       2017-09-09
##  gtable                 0.2.0     2016-02-26
##  hms                    0.4.2     2018-03-10
##  htmltools              0.3.6     2017-04-28
##  htmlwidgets            1.2       2018-04-19
##  httpuv                 1.4.4.2   2018-07-02
##  httr                   1.3.1     2017-08-20
##  IRanges              * 2.12.0    2017-10-31
##  jsonlite               1.5       2017-06-01
##  knitr                  1.20      2018-02-20
##  labeling               0.3       2014-08-23
##  later                  0.7.3     2018-06-08
##  lattice                0.20-35   2017-03-25
##  lazyeval               0.2.1     2017-10-29
##  limma                  3.34.9    2018-02-22
##  locfit                 1.5-9.1   2013-04-20
##  magrittr               1.5       2014-11-22
##  Matrix               * 1.2-14    2018-04-09
##  matrixStats          * 0.54.0    2018-07-23
##  memoise                1.1.0     2017-04-21
##  methods              * 3.4.4     2018-03-15
##  mime                   0.5       2016-07-07
##  munsell                0.5.0     2018-06-12
##  parallel             * 3.4.4     2018-03-15
##  pillar                 1.2.3     2018-05-25
##  pkgconfig              2.0.1     2017-03-21
##  plotly               * 4.7.1     2017-07-29
##  plyr                   1.8.4     2016-06-08
##  prettyunits            1.0.2     2015-07-13
##  progress               1.2.0     2018-06-14
##  promises               1.0.1     2018-04-13
##  purrr                  0.2.5     2018-05-29
##  R6                     2.2.2     2017-06-17
##  Rcpp                   0.12.18   2018-07-23
##  RCurl                  1.95-4.10 2018-01-04
##  reshape2               1.4.3     2017-12-11
##  rhdf5                  2.22.0    2017-10-31
##  rjson                  0.2.20    2018-06-08
##  rlang                  0.2.1     2018-05-30
##  rmarkdown              1.10      2018-06-11
##  rprojroot              1.3-2     2018-01-03
##  RSQLite                2.1.1     2018-05-06
##  S4Vectors            * 0.16.0    2017-10-31
##  scales                 1.0.0     2018-08-09
##  scater               * 1.6.3     2018-03-20
##  shiny                  1.1.0     2018-05-17
##  shinydashboard         0.7.0     2018-03-21
##  SingleCellExperiment * 1.0.0     2017-10-31
##  stats                * 3.4.4     2018-03-15
##  stats4               * 3.4.4     2018-03-15
##  stringi                1.2.3     2018-06-12
##  stringr                1.3.1     2018-05-10
##  SummarizedExperiment * 1.8.1     2017-12-19
##  tibble                 1.4.2     2018-01-22
##  tidyr                  0.8.1     2018-05-18
##  tidyselect             0.2.4     2018-02-26
##  tools                  3.4.4     2018-03-15
##  tximport               1.6.0     2017-10-31
##  utils                * 3.4.4     2018-03-15
##  vipor                  0.4.5     2017-03-22
##  viridis                0.5.1     2018-03-29
##  viridisLite            0.3.0     2018-02-01
##  withr                  2.1.2     2018-05-25
##  XML                    3.98-1.11 2018-04-16
##  xtable                 1.8-2     2016-02-05
##  XVector                0.18.0    2017-10-31
##  yaml                   2.1.19    2018-05-01
##  zlibbioc               1.24.0    2017-10-31
##  source                          
##  Bioconductor                    
##  CRAN (R 3.4.0)                  
##  CRAN (R 3.4.3)                  
##  local                           
##  CRAN (R 3.4.0)                  
##  CRAN (R 3.4.4)                  
##  CRAN (R 3.4.4)                  
##  Bioconductor                    
##  Bioconductor                    
##  Bioconductor                    
##  CRAN (R 3.4.4)                  
##  CRAN (R 3.4.0)                  
##  CRAN (R 3.4.0)                  
##  CRAN (R 3.4.4)                  
##  CRAN (R 3.4.0)                  
##  local                           
##  cran (@0.9.3)                   
##  CRAN (R 3.4.1)                  
##  CRAN (R 3.4.0)                  
##  CRAN (R 3.4.4)                  
##  local                           
##  cran (@1.0.0)                   
##  Bioconductor                    
##  CRAN (R 3.4.4)                  
##  CRAN (R 3.4.3)                  
##  cran (@0.7.6)                   
##  Bioconductor                    
##  CRAN (R 3.4.1)                  
##  Bioconductor                    
##  Bioconductor                    
##  Bioconductor                    
##  CRAN (R 3.4.1)                  
##  url                             
##  CRAN (R 3.4.2)                  
##  local                           
##  local                           
##  local                           
##  CRAN (R 3.4.1)                  
##  CRAN (R 3.4.0)                  
##  CRAN (R 3.4.4)                  
##  CRAN (R 3.4.0)                  
##  CRAN (R 3.4.4)                  
##  CRAN (R 3.4.4)                  
##  CRAN (R 3.4.1)                  
##  Bioconductor                    
##  CRAN (R 3.4.0)                  
##  CRAN (R 3.4.3)                  
##  CRAN (R 3.4.0)                  
##  CRAN (R 3.4.4)                  
##  CRAN (R 3.4.4)                  
##  CRAN (R 3.4.2)                  
##  Bioconductor                    
##  CRAN (R 3.4.0)                  
##  CRAN (R 3.4.0)                  
##  CRAN (R 3.4.4)                  
##  cran (@0.54.0)                  
##  CRAN (R 3.4.0)                  
##  local                           
##  CRAN (R 3.4.0)                  
##  CRAN (R 3.4.4)                  
##  local                           
##  CRAN (R 3.4.4)                  
##  CRAN (R 3.4.0)                  
##  CRAN (R 3.4.1)                  
##  CRAN (R 3.4.0)                  
##  CRAN (R 3.4.0)                  
##  CRAN (R 3.4.4)                  
##  CRAN (R 3.4.4)                  
##  cran (@0.2.5)                   
##  CRAN (R 3.4.0)                  
##  cran (@0.12.18)                 
##  CRAN (R 3.4.3)                  
##  CRAN (R 3.4.3)                  
##  Bioconductor                    
##  CRAN (R 3.4.4)                  
##  CRAN (R 3.4.4)                  
##  CRAN (R 3.4.4)                  
##  CRAN (R 3.4.3)                  
##  CRAN (R 3.4.4)                  
##  Bioconductor                    
##  cran (@1.0.0)                   
##  Bioconductor                    
##  cran (@1.1.0)                   
##  CRAN (R 3.4.4)                  
##  Bioconductor                    
##  local                           
##  local                           
##  CRAN (R 3.4.4)                  
##  cran (@1.3.1)                   
##  Bioconductor                    
##  CRAN (R 3.4.3)                  
##  cran (@0.8.1)                   
##  CRAN (R 3.4.3)                  
##  local                           
##  Bioconductor                    
##  local                           
##  CRAN (R 3.4.0)                  
##  CRAN (R 3.4.4)                  
##  CRAN (R 3.4.3)                  
##  Github (jimhester/withr@70d6321)
##  CRAN (R 3.4.4)                  
##  CRAN (R 3.4.0)                  
##  Bioconductor                    
##  CRAN (R 3.4.4)                  
##  Bioconductor